This document provides a few direction for the visualization of geographical connections. The dataset is available on github. It lists about 13k tweets containing the #surf hashtag. This tweets have been recovered on a 10 months period and you can learn more on this project dedicated page.
The dataset provides longitude and latitude for both the home location of tweeters, and their instant geolocation as well. Basically it looks like that:
# Libraries
library(tidyverse)
library(hrbrthemes)
library(viridis)
library(DT)
library(kableExtra)
options(knitr.table.format = "html")
#load(url("https://github.com/holtzy/About-Surfers-On-Twitter/raw/master/DATA/Surf_Hashtag_Data.Rdata"))
#data <- data %>% select(homename, homecontinent, homecountry, homelat, homelon, travelcontinent, travelcountry, travellat, travellon) %>% na.omit()
#write.table(data, file="/Users/y.holtz/Dropbox/data_to_viz/Example_dataset/19_MapConnection.csv", quote=TRUE, row.names=FALSE, sep=",")
# Load dataset from github (Surfer project)
data <- read.table("https://github.com/holtzy/data_to_viz/raw/master/Example_dataset/19_MapConnection.csv", header=T, sep=",")
# Show long format
tmp <- data %>%
tail(5) %>%
mutate(homename = gsub( ", Western Australia", "", homename)) %>%
mutate(homename = gsub( ", France", "", homename)) %>%
mutate(homename = gsub( " - Bali - Indonesia", "", homename)) %>%
mutate(homelat=round(homelat,1), homelon=round(homelon,1), travellat=round(travellat,1), travellon=round(travellon,1)) %>%
select(homename, homelat, homelon, travelcountry, travellat, travellon)
tmp %>% kable() %>% kable_styling(bootstrap_options = "striped", full_width = F)| homename | homelat | homelon | travelcountry | travellat | travellon |
|---|---|---|---|---|---|
| Bridgetown | -34.0 | 116.1 | Australia | -34.2 | 115.0 |
| Lille | 50.6 | 3.1 | France | 45.0 | -1.2 |
| MX | 23.6 | -102.6 | Mexico | 21.0 | -101.2 |
| Kuta | -8.7 | 115.2 | Indonesia | -8.7 | 115.2 |
| Kuta | -8.7 | 115.2 | Indonesia | -8.7 | 115.2 |
Before showing all the relationships provided in this dataset, it is important to understand how to visualize a unique connection on a map. It is a common practice to link 2 points using the shortest route between them instead of a straight line. It is called great circles. A special care is given for situations where cities are very far from each other and where the shortest connection thus passes behind the map.
Here are the connection between 8 cities on a world map:
don=rbind(
Buenos_aires=c(-58,-34),
Paris=c(2,49),
Melbourne=c(145,-38),
Saint.Petersburg=c(30.32, 59.93),
Abidjan=c(-4.03, 5.33),
Montreal=c(-73.57, 45.52),
Nairobi=c(36.82, -1.29),
Salvador=c(-38.5, -12.97)
) %>% as.data.frame()
colnames(don)=c("long","lat")
all_pairs=cbind(t(combn(don$long, 2)), t(combn(don$lat, 2))) %>% as.data.frame()
colnames(all_pairs)=c("long1","long2","lat1","lat2")
plot_my_connection=function( dep_lon, dep_lat, arr_lon, arr_lat, ...){
inter <- gcIntermediate(c(dep_lon, dep_lat), c(arr_lon, arr_lat), n=50, addStartEnd=TRUE, breakAtDateLine=F)
inter=data.frame(inter)
diff_of_lon=abs(dep_lon) + abs(arr_lon)
if(diff_of_lon > 180){
lines(subset(inter, lon>=0), ...)
lines(subset(inter, lon<0), ...)
}else{
lines(inter, ...)
}
}
library(maps)
library(geosphere)
# background map
par(mar=c(0,0,0,0))
map('world',col="#f2f2f2", fill=TRUE, bg="white", lwd=0.05,mar=rep(0,4),border=0, ylim=c(-80,80) )
# add every connections:
for(i in 1:nrow(all_pairs)){
plot_my_connection(all_pairs$long1[i], all_pairs$lat1[i], all_pairs$long2[i], all_pairs$lat2[i], col="skyblue", lwd=1)
}
# add points and names of cities
points(x=don$long, y=don$lat, col="slateblue", cex=2, pch=20)
text(rownames(don), x=don$long, y=don$lat, col="slateblue", cex=1, pos=4)library(tidyverse)
library(jpeg)
library(maps)
library(geosphere)
library(grid)# Download NASA night lights image
download.file("https://www.nasa.gov/specials/blackmarble/2016/globalmaps/BlackMarble_2016_01deg.jpg", destfile = "IMG/BlackMarble_2016_01deg.jpg", mode = "wb")
# Load picture and render
earth <- readJPEG("IMG/BlackMarble_2016_01deg.jpg", native = TRUE)
earth <- rasterGrob(earth, interpolate = TRUE)# Count how many times we have each unique connexion + order by importance
summary=data %>%
count(homelat,homelon,homecontinent, travellat,travellon,travelcontinent) %>%
#filter(n>1) %>%
arrange(n)
summary=head(summary, 1000)
# A function that makes a dateframe per connection (we will use these connections to plot each lines)
data_for_connection=function( dep_lon, dep_lat, arr_lon, arr_lat, group){
inter <- gcIntermediate(c(dep_lon, dep_lat), c(arr_lon, arr_lat), n=50, addStartEnd=TRUE, breakAtDateLine=F)
inter=data.frame(inter)
inter$group=NA
diff_of_lon=abs(dep_lon) + abs(arr_lon)
if(diff_of_lon > 180){
inter$group[ which(inter$lon>=0)]=paste(group, "A",sep="")
inter$group[ which(inter$lon<0)]=paste(group, "B",sep="")
}else{
inter$group=group
}
return(inter)
}
# Création d'un dataframe complet avec les points de toutes les lignes à faire.
data_ready_plot=data.frame()
for(i in c(1:nrow(summary))){
tmp=data_for_connection(summary$homelon[i], summary$homelat[i], summary$travellon[i], summary$travellat[i] , i)
tmp$homecontinent=summary$homecontinent[i]
tmp$n=summary$n[i]
data_ready_plot=rbind(data_ready_plot, tmp)
}
data_ready_plot$homecontinent=factor(data_ready_plot$homecontinent, levels=c("Asia","Europe","Australia","Africa","North America","South America","Antarctica"))ggplot() +
annotation_custom(earth, xmax = 180, ymin = -Inf, ymax = Inf) +
geom_line(data=data_ready_plot, size=0.5, aes(x=lon, y=lat, group=group, colour=homecontinent, alpha=n)) +
xlim(-170, 185)+ ylim(-59, 80) +
theme(
legend.position="none",
#plot.margin = unit(rep(-1.25,4),"lines"),
plot.background = element_rect(fill = "#00001C"),
panel.background = element_rect(fill='#00001C',colour='#00001C' ),
panel.grid=element_blank(),
panel.border = element_blank(),
axis.title=element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank()
) ## Warning: Removed 426 rows containing missing values (geom_path).
A work by Yan Holtz for data-to-viz.com